Unittest the whitespace stripping logic for tox text generation
The separate class allows to unittest this functionality in isoloation.
Change-Id: I1e5eddfb455ca85a662ea38c03302883decc5d58
Reviewed-on: https://gerrit.libreoffice.org/9608
Tested-by: Caolán McNamara <caolanm@redhat.com>
Reviewed-by: Caolán McNamara <caolanm@redhat.com>
diff --git a/sw/CppunitTest_sw_tox.mk b/sw/CppunitTest_sw_tox.mk
new file mode 100644
index 0000000..f372442
--- /dev/null
+++ b/sw/CppunitTest_sw_tox.mk
@@ -0,0 +1,50 @@
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# This file contains the unit test definition for class in the sources/core/tox subfolder
# The macro which defines the main method is contained in test_ToxWhitespaceStripper.cxx
$(eval $(call gb_CppunitTest_CppunitTest,sw_tox_test))
$(eval $(call gb_CppunitTest_add_exception_objects,sw_tox_test, \
sw/qa/cppunit/tox/test_ToxWhitespaceStripper \
))
$(eval $(call gb_CppunitTest_use_libraries,sw_tox_test, \
comphelper \
cppu \
cppuhelper \
sal \
svt \
sw \
test \
unotest \
vcl \
tl \
utl \
$(gb_UWINAPI) \
))
$(eval $(call gb_CppunitTest_use_externals,sw_tox_test, \
boost_headers \
libxml2 \
))
$(eval $(call gb_CppunitTest_use_api,sw_tox_test,\
offapi \
udkapi \
))
$(eval $(call gb_CppunitTest_set_include,sw_tox_test,\
-I$(SRCDIR)/sw/inc \
-I$(SRCDIR)/sw/source/core/inc \
$$(INCLUDE) \
))
# vim: set noet sw=4 ts=4:
diff --git a/sw/Library_sw.mk b/sw/Library_sw.mk
index c9804a0..a7500ad 100644
--- a/sw/Library_sw.mk
+++ b/sw/Library_sw.mk
@@ -389,6 +389,7 @@
sw/source/core/tox/toxhlp \
sw/source/core/tox/txmsrt \
sw/source/core/tox/ToxTextGenerator \
sw/source/core/tox/ToxWhitespaceStripper \
sw/source/core/txtnode/SwGrammarContact \
sw/source/core/txtnode/atrfld \
sw/source/core/txtnode/atrflyin \
diff --git a/sw/Module_sw.mk b/sw/Module_sw.mk
index 53a19bb..7e71e2c 100644
--- a/sw/Module_sw.mk
+++ b/sw/Module_sw.mk
@@ -46,6 +46,10 @@
endif
$(eval $(call gb_Module_add_check_targets,sw,\
CppunitTest_sw_tox \
))
$(eval $(call gb_Module_add_slowcheck_targets,sw,\
CppunitTest_sw_uwriter \
CppunitTest_sw_htmlexport \
diff --git a/sw/inc/ToxWhitespaceStripper.hxx b/sw/inc/ToxWhitespaceStripper.hxx
new file mode 100644
index 0000000..96b254b
--- /dev/null
+++ b/sw/inc/ToxWhitespaceStripper.hxx
@@ -0,0 +1,52 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#ifndef TOXWHITESPACESTRIPPER_HXX_
#define TOXWHITESPACESTRIPPER_HXX_
#include "rtl/ustring.hxx"
#include <vector>
namespace sw {
/** This class helps to remove unwanted whitespaces from a string to use in a Tox.
*
* The new string will have
* - Newlines changed to spaces
* - Consecutive spaces merged
* - Trailing spaces removed
*
* It also allows to find the corresponding new positions of the input string in the stripped string.
* This is important for attributes which might have to be imported, e.g., it helps to answer the question:
* The 3rd character of the input string is subscript, which character in the output string is that?
*
* @note One leading whitespace is preserved.
*/
class SAL_DLLPUBLIC ToxWhitespaceStripper {
public:
ToxWhitespaceStripper(const OUString&);
sal_Int32
GetPositionInStrippedString(sal_Int32 pos) const;
OUString
GetStrippedString() const;
private:
OUString mStripped;
std::vector<sal_Int32> mNewPositions;
};
} // end namespace sw
#endif /* TOXWHITESPACESTRIPPER_HXX_ */
diff --git a/sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx b/sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx
new file mode 100644
index 0000000..fdbd47c
--- /dev/null
+++ b/sw/qa/cppunit/tox/test_ToxWhitespaceStripper.cxx
@@ -0,0 +1,150 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include <stdexcept>
#include <sal/types.h>
#include <rtl/ustring.hxx>
#include <ToxWhitespaceStripper.hxx>
#include <cppunit/TestAssert.h>
#include <cppunit/TestFixture.h>
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/plugin/TestPlugIn.h>
using namespace sw;
class ToxWhitespaceStripperTest : public CppUnit::TestFixture
{
void
MappingCharactersToVariousStrippedStringsWorks();
void
StrippingWhitespacesFromVariousStringsWorks();
void
PositionAfterStringCanBeRequested();
CPPUNIT_TEST_SUITE(ToxWhitespaceStripperTest);
CPPUNIT_TEST(MappingCharactersToVariousStrippedStringsWorks);
CPPUNIT_TEST(StrippingWhitespacesFromVariousStringsWorks);
CPPUNIT_TEST(PositionAfterStringCanBeRequested);
CPPUNIT_TEST_SUITE_END();
};
void
ToxWhitespaceStripperTest::MappingCharactersToVariousStrippedStringsWorks()
{
{
OUString test("abc\n");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1));
CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2));
CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3));
}
{
OUString test("abc\n\n");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1));
CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2));
CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3));
CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(4));
}
{
OUString test("abc\ndef");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(1));
CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(2));
CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(3));
CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(4));
CPPUNIT_ASSERT_EQUAL(5, sut.GetPositionInStrippedString(5));
CPPUNIT_ASSERT_EQUAL(6, sut.GetPositionInStrippedString(6));
}
{
// 012345 6789
OUString test(" abc \ndef");
// 01234567
// " abc def"
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(0));
CPPUNIT_ASSERT_EQUAL(0, sut.GetPositionInStrippedString(1));
CPPUNIT_ASSERT_EQUAL(1, sut.GetPositionInStrippedString(2));
CPPUNIT_ASSERT_EQUAL(2, sut.GetPositionInStrippedString(3));
CPPUNIT_ASSERT_EQUAL(3, sut.GetPositionInStrippedString(4));
CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(5));
CPPUNIT_ASSERT_EQUAL(4, sut.GetPositionInStrippedString(6));
CPPUNIT_ASSERT_EQUAL(5, sut.GetPositionInStrippedString(7));
CPPUNIT_ASSERT_EQUAL(6, sut.GetPositionInStrippedString(8));
CPPUNIT_ASSERT_EQUAL(7, sut.GetPositionInStrippedString(9));
}
}
void
ToxWhitespaceStripperTest::StrippingWhitespacesFromVariousStringsWorks()
{
{
OUString test("abc\n");
OUString expected("abc");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
}
{
OUString test("abc\n\n");
OUString expected("abc");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
}
{
OUString test("abc\ndef");
OUString expected("abc def");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
}
{
OUString test(" abc \ndef");
OUString expected(" abc def");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
}
{
OUString test(" ");
OUString expected("");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
}
{
OUString test("d ");
OUString expected("d");
ToxWhitespaceStripper sut(test);
CPPUNIT_ASSERT_EQUAL(expected, sut.GetStrippedString());
}
}
void
ToxWhitespaceStripperTest::PositionAfterStringCanBeRequested()
{
OUString test("abc");
ToxWhitespaceStripper sut(test);
sal_Int32 expected = test.getLength();
CPPUNIT_ASSERT_EQUAL(expected, sut.GetPositionInStrippedString(test.getLength()));
}
// Put the test suite in the registry
CPPUNIT_TEST_SUITE_REGISTRATION(ToxWhitespaceStripperTest);
CPPUNIT_PLUGIN_IMPLEMENT();
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/core/tox/ToxTextGenerator.cxx b/sw/source/core/tox/ToxTextGenerator.cxx
index 54d7b6a..8554c88 100644
--- a/sw/source/core/tox/ToxTextGenerator.cxx
+++ b/sw/source/core/tox/ToxTextGenerator.cxx
@@ -33,6 +33,7 @@
#include "fmtpdsc.hxx"
#include "DocumentSettingManager.hxx"
#include "SwStyleNameMapper.hxx"
#include "ToxWhitespaceStripper.hxx"
#include "editeng/tstpitem.hxx"
#include "editeng/lrspitem.hxx"
@@ -49,32 +50,6 @@
nEndTextPos(nEnd) {}
};
/// Generate String with newlines changed to spaces, consecutive spaces changed
/// to a single space, and trailing space removed.
OUString lcl_RemoveLineBreaks(const OUString &rRet)
{
if (rRet.isEmpty())
return rRet;
sal_Int32 nOffset = 0;
OUStringBuffer sRet(rRet.replace('\n', ' '));
for (sal_Int32 i = 1; i < sRet.getLength(); ++i)
{
if ( sRet[i - 1] == ' ' && sRet[i] == ' ' )
{
nOffset += 1;
}
else
{
sRet[i - nOffset] = sRet[i];
}
}
if (sRet[sRet.getLength() - 1] == ' ')
{
nOffset += 1;
}
return sRet.copy(0, sRet.getLength() - nOffset).toString();
}
/// Generate String according to the Form and remove the
/// special characters 0-31 and 255.
static OUString lcl_GetNumString( const SwTOXSortTabBase& rBase, bool bUsePrefix, sal_uInt8 nLevel )
@@ -144,8 +119,8 @@
case TOKEN_ENTRY_TEXT:
{
SwIndex aIdx( pTOXNd, std::min(pTOXNd->GetTxt().getLength(),rTxt.getLength()) );
rBase.FillText( *pTOXNd, aIdx );
rTxt = lcl_RemoveLineBreaks(rTxt);
ToxWhitespaceStripper stripper(rBase.GetTxt().sText);
pTOXNd->InsertText(stripper.GetStrippedString(), aIdx);
}
break;
@@ -153,10 +128,9 @@
{
// for TOC numbering
rTxt += lcl_GetNumString( rBase, true, MAXLEVEL );
SwIndex aIdx( pTOXNd, rTxt.getLength() );
rBase.FillText( *pTOXNd, aIdx );
rTxt = lcl_RemoveLineBreaks(rTxt);
ToxWhitespaceStripper stripper(rBase.GetTxt().sText);
pTOXNd->InsertText(stripper.GetStrippedString(), aIdx);
}
break;
diff --git a/sw/source/core/tox/ToxWhitespaceStripper.cxx b/sw/source/core/tox/ToxWhitespaceStripper.cxx
new file mode 100644
index 0000000..b01c92c
--- /dev/null
+++ b/sw/source/core/tox/ToxWhitespaceStripper.cxx
@@ -0,0 +1,62 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include "ToxWhitespaceStripper.hxx"
#include "rtl/ustrbuf.hxx"
#include <boost/numeric/conversion/cast.hpp>
namespace sw {
ToxWhitespaceStripper::ToxWhitespaceStripper(const OUString& inputString)
{
OUStringBuffer buffer;
bool lastCharacterWasWhitespace = false;
for (sal_Int32 pos = 0; pos < inputString.getLength(); ++pos) {
sal_Unicode cur = inputString[pos];
if (cur == ' ' || cur == '\n') {
// merge consecutive whitespaces (and translate them to spaces)
if (!lastCharacterWasWhitespace) {
buffer.append(' ');
}
lastCharacterWasWhitespace = true;
}
else {
buffer.append(cur);
lastCharacterWasWhitespace = false;
}
mNewPositions.push_back(buffer.getLength()-1);
}
// Add one position if the position after the stripped string is requested, e.g., for attributes which
// extend beyond the string.
mNewPositions.push_back(buffer.getLength());
// strip the last whitespace (if there was one)
if (lastCharacterWasWhitespace) {
buffer.truncate(buffer.getLength() - 1);
}
mStripped = buffer.getStr();
}
sal_Int32
ToxWhitespaceStripper::GetPositionInStrippedString(sal_Int32 pos) const
{
size_t upos = boost::numeric_cast<size_t>(pos);
return mNewPositions.at(upos);
}
OUString
ToxWhitespaceStripper::GetStrippedString() const
{
return mStripped;
}
}